#################################################################
rm(list=ls())
#################################################################
# Dependencies
#################################################################
# global
library(dplyr)
library(magrittr)
library(rvest)

setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
getwd()

# local
source('regexify-names-fx.R')

### create additional variables for candidate data
# load candidate data
df <- read.csv('../input/candidates_list_final_2019.csv', stringsAsFactors = F) %>% as_tibble
str(df)
# shape metadata and add the regex term
df %<>% mutate(
  id=paste0(sprintf("%04d", row_number()), sprintf("%02d", nchar(firstname)), sprintf("%02d", nchar(lastname)), zip),
  name=paste(firstname, lastname),
  #name.rev=paste(lastname, firstname),
  #name.regex=sapply(name.rev, function(x) regexify_names(x, method='loose')),
  name.regex=
    df %>%
    rowwise() %>%
    do(i=regexify_names2(.$firstname, .$lastname)) %>%
    unlist %>%
    unname
)

write.csv(df, '../output/00-Named_Entity_List_withID.csv', row.names = F)


#### create list with party name sin different languages
# create party table
plist <- lapply(
  c(
    'http://www.politik-stat.ch/2015pa_de.html',
    'http://www.politik-stat.ch/2015pa_fr.html',
    'http://www.politik-stat.ch/2015pa_it.html'
  ),
  function(x){
    plist <- read_html(x) %>%
      html_nodes('.data') %>%
      html_text()
    plist <- plist %>% gsub(intToUtf8(160), "", .) %>% .[!.==""]
    plist <- tibble(abbrv=plist[seq(1,length(plist),2)], partyName=plist[seq(2,length(plist),2)])
    return(plist)
  }
)

sort(unique(df$party_short))
bindr <- c('FDP', 'CVP', 'SP', 'SVP', NA, NA, 'EVP', 'CSP', 'glp', 'BDP', 'PdA', NA, NA, NA, NA, 'Grüne', 'SD', NA, NA, 'Lega')
abbrv <- lapply(1:length(plist[[1]]$abbrv), function(x) c(plist[[1]]$abbrv[x], plist[[2]]$abbrv[x], plist[[3]]$abbrv[x])) %>% setNames(., bindr)
partyName <- lapply(1:length(plist[[1]]$partyName), function(x) c(plist[[1]]$partyName[x], plist[[2]]$partyName[x], plist[[3]]$partyName[x])) %>% setNames(., bindr)
plist <- list(abbrv=abbrv, partyName=partyName)
saveRDS(plist, '../output/00-partyList.RDS')


